1 Read data

emilianto <- readRDS("data/rds/emilianto.rds")
emilianto_attitude <- readRDS("data/rds/emilianto_attitude.rds")
attitudes <- readRDS("data/rds/attitudes.rds")
pa_spaces <- readRDS("data/rds/pa_spaces.rds")
ty_spaces <- readRDS("data/rds/ty_spaces.rds")

2 Emilian

emilian <- emilianto %>%
  filter(language == "Emilian")

2.1 Participants

emilian %>%
  ggplot(aes(gender)) +
  geom_bar()

emilian %>%
  ggplot(aes(age)) +
  geom_histogram()

emilian %>%
  ggplot(aes(age)) +
  geom_density()

emilian %>%
  ggplot(aes(education)) +
  geom_bar()

emilian %>%
  ggplot(aes(age, education)) +
  geom_jitter(height = 0.2, alpha = 0.5)

emilian %>%
  count(profession) %>%
  ggplot(aes(reorder(profession, -n), n)) +
  geom_bar(stat = "identity")

emilian %>%
  count(languages_family) %>%
  ggplot(aes(reorder(languages_family, -n), n)) +
  geom_bar(stat = "identity")

emilian %>%
  count(languages_parents) %>%
  ggplot(aes(reorder(languages_parents, -n), n)) +
  geom_bar(stat = "identity")

2.2 Language

2.2.1 Understand

emilian %>%
  ggplot(aes(comprehend, fill = comprehend)) +
  geom_bar() +
  scale_fill_brewer(type = "div") +
  theme_dark()

emilian %>%
  ggplot(aes(comprehend, fill = gender)) +
  geom_bar()

emilian %>%
  ggplot(aes(comprehend, fill = gender)) +
  geom_bar(position = "fill")

emilian %>%
  ggplot(aes(age, fill = comprehend)) +
  geom_histogram(binwidth = 5) +
  facet_grid(comprehend ~ .)

emilian %>%
  ggplot(aes(comprehend, fill = profession)) +
  geom_bar()

emilian %>%
  ggplot(aes(comprehend, fill = profession)) +
  geom_bar(position = "fill")

2.2.2 Speak

emilian %>%
  ggplot(aes(speak, fill = speak)) +
  geom_bar() +
  scale_fill_brewer(type = "div")

emilian %>%
  ggplot(aes(speak, fill = gender)) +
  geom_bar()

emilian %>%
  ggplot(aes(speak, fill = gender)) +
  geom_bar(position = "fill")

emilian %>%
  ggplot(aes(age, fill = speak)) +
  geom_histogram(binwidth = 5) +
  facet_grid(speak ~ .)

emilian %>%
  ggplot(aes(speak, fill = profession)) +
  geom_bar()

emilian %>%
  ggplot(aes(speak, fill = profession)) +
  geom_bar(position = "fill")

2.2.3 Read and write

emilian %>%
  ggplot(aes(read_write, fill = read_write)) +
  geom_bar()

emilian %>%
  ggplot(aes(read_write, fill = gender)) +
  geom_bar()

emilian %>%
  ggplot(aes(read_write, fill = gender)) +
  geom_bar(position = "fill")

emilian %>%
  drop_na(read_write) %>% 
  ggplot(aes(age, fill = read_write)) +
  geom_histogram(binwidth = 5) +
  facet_grid(read_write ~ .)

emilian %>%
  ggplot(aes(read_write, fill = profession)) +
  geom_bar()

emilian %>%
  ggplot(aes(read_write, fill = profession)) +
  geom_bar(position = "fill")

2.2.4 Attitude

emilian %>%
  dplyr::select(educated:familiar) %>%
  pivot_longer(educated:familiar, names_to = "feature", values_to = "rating") %>%
  ggplot(aes(as.factor(rating), fill = as.factor(rating))) +
  geom_bar() +
  scale_fill_brewer() +
  facet_grid(. ~ feature)

emilian %>%
  dplyr::select(educated:familiar) %>%
  pivot_longer(educated:familiar, names_to = "feature", values_to = "rating") %>%
  ggplot(aes(feature, fill = as.factor(rating))) +
  geom_bar(position = "fill") +
  scale_fill_brewer()

2.3 Urban vs Rural

emil_rur <- emilian %>%
  mutate(
    ru_ur = ifelse(
      str_detect(birth_place, "-RU"), "rural",
      ifelse(
        str_detect(birth_place, "-UR"), "urban",
        NA
      )
    )
  )

emil_rur_clean <- emil_rur %>%
  dplyr::select(id, comprehend, speak, read_write, educated:familiar, ru_ur) %>%
  mutate(
    comprehend = ordered(comprehend, levels = c("NO", "AL", "50/50", "G", "VG")),
    speak = ordered(speak, levels = c("NO", "AL", "50/50", "G", "VG")),
    across(educated:familiar, ~ as.ordered(.x))
  ) %>%
 drop_na()
emil_rur %>%
  drop_na(ru_ur) %>%
  ggplot(aes(ru_ur, fill = comprehend)) +
  geom_bar()

emil_rur %>%
  drop_na(ru_ur) %>%
  ggplot(aes(ru_ur, fill = comprehend)) +
  geom_bar(position = "fill")

3 Esperanto

esperanto <- emilianto %>%
  filter(language == "Esperanto")

3.1 Participants

esperanto %>%
  ggplot(aes(gender)) +
  geom_bar()

esperanto %>%
  ggplot(aes(age)) +
  geom_histogram()

esperanto %>%
  ggplot(aes(age)) +
  geom_density()

esperanto %>%
  ggplot(aes(education)) +
  geom_bar()

esperanto %>%
  ggplot(aes(age, education)) +
  geom_jitter(height = 0.2, alpha = 0.5)

esperanto %>%
  count(profession) %>%
  ggplot(aes(reorder(profession, -n), n)) +
  geom_bar(stat = "identity")

esperanto %>%
  count(languages_family) %>%
  ggplot(aes(reorder(languages_family, -n), n)) +
  geom_bar(stat = "identity")

3.2 Language

3.2.1 Understand

esperanto %>%
  ggplot(aes(comprehend, fill = comprehend)) +
  geom_bar() +
  scale_fill_brewer(type = "div") +
  theme_dark()

esperanto %>%
  ggplot(aes(comprehend, fill = gender)) +
  geom_bar()

esperanto %>%
  ggplot(aes(comprehend, fill = gender)) +
  geom_bar(position = "fill")

esperanto %>%
  ggplot(aes(age, fill = comprehend)) +
  geom_histogram(binwidth = 5) +
  facet_grid(comprehend ~ .)

esperanto %>%
  ggplot(aes(comprehend, fill = profession)) +
  geom_bar()

esperanto %>%
  ggplot(aes(comprehend, fill = profession)) +
  geom_bar(position = "fill")

3.2.2 Speak

esperanto %>%
  ggplot(aes(speak, fill = speak)) +
  geom_bar() +
  scale_fill_brewer(type = "div")

esperanto %>%
  ggplot(aes(speak, fill = gender)) +
  geom_bar()

esperanto %>%
  ggplot(aes(speak, fill = gender)) +
  geom_bar(position = "fill")

esperanto %>%
  ggplot(aes(age, fill = speak)) +
  geom_histogram(binwidth = 5) +
  facet_grid(speak ~ .)

esperanto %>%
  ggplot(aes(speak, fill = profession)) +
  geom_bar()

esperanto %>%
  ggplot(aes(speak, fill = profession)) +
  geom_bar(position = "fill")

3.2.3 Read and write

esperanto %>%
  ggplot(aes(read_write, fill = read_write)) +
  geom_bar()

3.2.4 Attitude

esperanto %>%
  dplyr::select(educated:familiar) %>%
  pivot_longer(educated:familiar, names_to = "feature", values_to = "rating") %>%
  drop_na() %>%
  ggplot(aes(as.factor(rating), fill = as.factor(rating))) +
  geom_bar() +
  scale_fill_brewer() +
  facet_grid(. ~ feature)

esperanto %>%
  dplyr::select(educated:familiar) %>%
  pivot_longer(educated:familiar, names_to = "feature", values_to = "rating") %>%
  drop_na() %>%
  ggplot(aes(feature, fill = as.factor(rating))) +
  geom_bar(position = "fill") +
  scale_fill_brewer()

4 Dim-1 and age

emilianto_attitude %>%
  ggplot(aes(age, dim_1, colour = language)) +
  geom_point() +
  geom_smooth(method = "lm", formula = "y ~ x")

5 Locations

if (file.exists("./data/raw/geo.csv")) {
  cat("Reading geocoding...\n")
  geo <- read_csv("./data/raw/geo.csv")
} else {
  birth_em <- emilian %>% select(birth_place_it) %>% unique()
  geo <- geocode(birth_em, city = birth_place_it, method = "osm", verbose = TRUE)
  write_csv(geo, file = "./data/raw/geo.csv")
}
## Reading geocoding...
europe <- ne_countries(continent = "Europe", returnclass = "sf", scale = "medium")
ggplot() +
  geom_sf(data = europe) +
  geom_point(data = geo, aes(long, lat)) +
  coord_sf(xlim = c(7, 14), ylim = c(43, 47))

6 Tables and plots included in the paper

columns <- c("gender", "age_2", "profession_2", "languages_family_2")

for (column in columns) {
  emilianto %>%
    group_by(language) %>%
    count(.data[[column]]) %>%
    mutate(
      prop = round(n / sum(n, na.rm = TRUE) * 100, 1)
    ) %>%
    pivot_wider(names_from = .data[[column]], values_from = c(n, prop)) %>%
    print()
}
## # A tibble: 2 × 7
## # Groups:   language [2]
##   language    n_F   n_M `n_LGBTQ+` prop_F prop_M `prop_LGBTQ+`
##   <chr>     <int> <int>      <int>  <dbl>  <dbl>         <dbl>
## 1 Emilian     269   165         NA   62     38            NA  
## 2 Esperanto    32   118          4   20.8   76.6           2.6
## # A tibble: 2 × 9
## # Groups:   language [2]
##   language  `n_0-20` `n_21-40` `n_41-60` `n_61+` `prop_0-20` `prop_21-40`
##   <chr>        <int>     <int>     <int>   <int>       <dbl>        <dbl>
## 1 Emilian        109       191       106      28        25.1         44  
## 2 Esperanto        8        39        53      54         5.2         25.3
## # ℹ 2 more variables: `prop_41-60` <dbl>, `prop_61+` <dbl>
## # A tibble: 2 × 11
## # Groups:   language [2]
##   language  `n_not in work` n_skilled n_student n_unskilled  n_NA
##   <chr>               <int>     <int>     <int>       <int> <int>
## 1 Emilian                45        95       153         138     3
## 2 Esperanto               4        62        20          21    47
## # ℹ 5 more variables: `prop_not in work` <dbl>, prop_skilled <dbl>,
## #   prop_student <dbl>, prop_unskilled <dbl>, prop_NA <dbl>
## # A tibble: 2 × 11
## # Groups:   language [2]
##   language  n_mono_e n_mono_o n_multi_e n_multi_o  n_NA prop_mono_e prop_mono_o
##   <chr>        <int>    <int>     <int>     <int> <int>       <dbl>       <dbl>
## 1 Emilian          2      261       112        57     2         0.5        60.1
## 2 Esperanto        1       91        33        29    NA         0.6        59.1
## # ℹ 3 more variables: prop_multi_e <dbl>, prop_multi_o <dbl>, prop_NA <dbl>
columns <- c("gender", "age_2", "profession_2", "languages_family_2")

for (column in columns) {
  emilianto %>%
    filter(
      comprehend %in% c("50/50", "G", "VG"),
      speak %in% c("50/50", "G", "VG")
    ) %>%
    group_by(language) %>%
    count(.data[[column]]) %>%
    mutate(
      prop = round(n / sum(n, na.rm = TRUE) * 100, 1)
    ) %>%
    pivot_wider(names_from = .data[[column]], values_from = c(n, prop)) %>%
    print()
}
## # A tibble: 2 × 7
## # Groups:   language [2]
##   language    n_F   n_M `n_LGBTQ+` prop_F prop_M `prop_LGBTQ+`
##   <chr>     <int> <int>      <int>  <dbl>  <dbl>         <dbl>
## 1 Emilian     160   100         NA   61.5   38.5          NA  
## 2 Esperanto    28   112          4   19.4   77.8           2.8
## # A tibble: 2 × 9
## # Groups:   language [2]
##   language  `n_0-20` `n_21-40` `n_41-60` `n_61+` `prop_0-20` `prop_21-40`
##   <chr>        <int>     <int>     <int>   <int>       <dbl>        <dbl>
## 1 Emilian         63       113        62      22        24.2         43.5
## 2 Esperanto        6        36        48      54         4.2         25  
## # ℹ 2 more variables: `prop_41-60` <dbl>, `prop_61+` <dbl>
## # A tibble: 2 × 11
## # Groups:   language [2]
##   language  `n_not in work` n_skilled n_student n_unskilled  n_NA
##   <chr>               <int>     <int>     <int>       <int> <int>
## 1 Emilian                29        61        88          80     2
## 2 Esperanto               2        60        18          20    44
## # ℹ 5 more variables: `prop_not in work` <dbl>, prop_skilled <dbl>,
## #   prop_student <dbl>, prop_unskilled <dbl>, prop_NA <dbl>
## # A tibble: 2 × 11
## # Groups:   language [2]
##   language  n_mono_e n_mono_o n_multi_e n_multi_o  n_NA prop_mono_e prop_mono_o
##   <chr>        <int>    <int>     <int>     <int> <int>       <dbl>       <dbl>
## 1 Emilian          2      157        70        29     2         0.8        60.4
## 2 Esperanto        1       82        33        28    NA         0.7        56.9
## # ℹ 3 more variables: prop_multi_e <dbl>, prop_multi_o <dbl>, prop_NA <dbl>
columns <- c("gender", "age_2", "profession_2", "languages_family_2")

emilianto_50 <- emilianto %>%
  mutate(
    und_speak = case_when(
      comprehend %in% c("50/50", "G", "VG") ~ ">50",
      speak %in% c("50/50", "G", "VG") ~ ">50",
      TRUE ~ "<50"
    )
  )

for (column in columns) {
 emilianto_50 %>%
    group_by(language) %>%
    add_count(name = "tot") %>%
    ungroup() %>%
    count(.data[[column]], und_speak, language, tot) %>%
    mutate(
      prop = round((n / tot) * 100, 1)
    ) %>%
    pivot_wider(names_from = .data[[column]], values_from = c(n, prop)) %>%
    print()
}
## # A tibble: 4 × 9
##   und_speak language    tot   n_F `n_LGBTQ+`   n_M prop_F `prop_LGBTQ+` prop_M
##   <chr>     <chr>     <int> <int>      <int> <int>  <dbl>         <dbl>  <dbl>
## 1 <50       Emilian     434    24         NA    13    5.5          NA      3  
## 2 <50       Esperanto   154     2         NA     3    1.3          NA      1.9
## 3 >50       Emilian     434   245         NA   152   56.5          NA     35  
## 4 >50       Esperanto   154    30          4   115   19.5           2.6   74.7
## # A tibble: 4 × 11
##   und_speak language    tot `n_0-20` `n_21-40` `n_41-60` `n_61+` `prop_0-20`
##   <chr>     <chr>     <int>    <int>     <int>     <int>   <int>       <dbl>
## 1 <50       Emilian     434        7        23         7      NA         1.6
## 2 >50       Emilian     434      102       168        99      28        23.5
## 3 >50       Esperanto   154        8        37        50      54         5.2
## 4 <50       Esperanto   154       NA         2         3      NA        NA  
## # ℹ 3 more variables: `prop_21-40` <dbl>, `prop_41-60` <dbl>, `prop_61+` <dbl>
## # A tibble: 4 × 13
##   und_speak language   tot `n_not in work` n_skilled n_student n_unskilled  n_NA
##   <chr>     <chr>    <int>           <int>     <int>     <int>       <int> <int>
## 1 <50       Emilian    434               2         9        15          11    NA
## 2 <50       Esperan…   154               2         1        NA          NA     2
## 3 >50       Emilian    434              43        86       138         127     3
## 4 >50       Esperan…   154               2        61        20          21    45
## # ℹ 5 more variables: `prop_not in work` <dbl>, prop_skilled <dbl>,
## #   prop_student <dbl>, prop_unskilled <dbl>, prop_NA <dbl>
## # A tibble: 4 × 13
##   und_speak language    tot n_mono_e n_mono_o n_multi_e n_multi_o  n_NA
##   <chr>     <chr>     <int>    <int>    <int>     <int>     <int> <int>
## 1 >50       Emilian     434        2      241       102        50     2
## 2 >50       Esperanto   154        1       86        33        29    NA
## 3 <50       Emilian     434       NA       20        10         7    NA
## 4 <50       Esperanto   154       NA        5        NA        NA    NA
## # ℹ 5 more variables: prop_mono_e <dbl>, prop_mono_o <dbl>, prop_multi_e <dbl>,
## #   prop_multi_o <dbl>, prop_NA <dbl>
emilianto %>%
  pivot_longer(all_of(c("comprehend", "speak")), names_to = "competence", values_to = "level") %>%
  mutate(
    competence = factor(competence, levels = c("comprehend", "speak"))
  ) %>%
  ggplot(aes(competence, fill = level)) +
  geom_bar(position = "fill") +
  scale_fill_brewer(type = "div", palette = "PRGn") +
  facet_grid(~ language) +
  labs(y = "Proportion")

ggsave("./img/competence.png", width = 7, height = 5)
pa_spaces %>%
  ggplot(aes(gender, proportion, fill = gender)) +
  geom_hline(yintercept = 0, size = 0.25) +
  geom_hline(yintercept = 1, size = 0.25) +
  geom_violin(colour = NA, bw = 0.1) +
  geom_boxplot(width = 0.05, fill = "black", colour = "black") +
  stat_summary(colour = "white", fun = "median", geom = "point") +
  ylim(0, 1) +
  facet_grid(spaces ~ language, labeller = as_labeller(list("competence" = c("a", "b")))) +
  scale_fill_brewer(type = "qual", palette = "Dark2") +
  labs(
    y = "Spaces (proportion)",
    x = "Gender"
  ) +
  theme_minimal(base_size = 16) +
  theme(legend.position = "none")

ggsave("./img/spaces.png", width = 7, height = 5)
ty_spaces %>%
  ggplot(aes(types, proportion, fill = types)) +
  geom_hline(yintercept = 0, size = 0.25) +
  geom_hline(yintercept = 1, size = 0.25) +
  geom_violin(colour = NA) +
  geom_boxplot(width = 0.05, fill = "black", colour = "black") +
  stat_summary(colour = "white", fun = "median", geom = "point") +
  ylim(0, 1) +
  facet_grid(~ language) +
  scale_fill_brewer(type = "qual", palette = "Dark2") +
  labs(
    y = "Total spaces (proportion)",
    x = "Space type"
  ) +
  theme_minimal(base_size = 16) +
  theme(legend.position = "none")

ggsave("./img/space-types.png", width = 7, height = 5)
ty_spaces %>%
  dplyr::select(language, id) %>%
  distinct() %>%
  count(language)
## # A tibble: 2 × 2
##   language      n
##   <chr>     <int>
## 1 Emilian     434
## 2 Esperanto   150
n_spaces <- c(42, 2, 2, 13, 23, 12, 11)

n_spaces_em <- n_spaces * 434
n_spaces_eo <- n_spaces * 150

ty_spaces_count <- ty_spaces %>%
  dplyr::select(language, id, ends_with("_spaces")) %>%
  distinct() %>%
  pivot_longer(passive_spaces:other_spaces, names_to = "space_type", values_to = "count") %>%
  group_by(language, space_type) %>%
  summarise(
    total_count = sum(count),
    .groups = "drop"
  )

ty_spaces_count$n_spaces <- c(n_spaces_em, n_spaces_eo)

ty_spaces_perc <- ty_spaces_count %>%
  mutate(
    perc = round((total_count / n_spaces) * 100)
  ) %>%
  mutate(space_type = str_remove(space_type, "_spaces"))
ty_spaces_perc %>%
  filter(!(space_type %in% c("passive", "active"))) %>%
  mutate(
    space_type = factor(space_type, levels = c("physical", "media", "press", "virtual", "other"))
  ) %>%
  ggplot(aes(space_type, perc, fill = space_type)) +
  geom_bar(stat = "identity") +
  scale_fill_brewer(type = "qual", palette = "Dark2") +
  facet_grid(~ language) +
  ylim(0, 100) +
  labs(x = "Space type", y = "Percent") +
  theme(legend.position = "none")

ggsave("./img/space-types-perc.png", width = 7, height = 5)
emilianto_lik <- emilianto_attitude %>%
  dplyr::select(language, educated:familiar) %>%
  pivot_longer(educated:familiar, names_to = "quality", values_to = "score") %>%
  count(language, quality, score) %>%
  pivot_wider(names_from = "score", values_from = n) %>%
  rename("strong disagree" = `1`, "disagree" = `2`, "neither" = `3`, "agree" = `4`, "strong agree" = `5`)

png("./img/likert.png", width = 7, height = 5, units = "in", res = 600)
likert(
  quality ~ . | language,
  emilianto_lik,
  as.percent = TRUE,
  main = ""
)
dev.off()
## quartz_off_screen 
##                 2